import time
import random
import argparse
import sys
import os

# 确保当前目录在路径中
current_dir = os.path.dirname(os.path.abspath(__file__))
if current_dir not in sys.path:
    sys.path.append(current_dir)

try:
    from bomberman_gym import BombermanEnv, BombermanAction
except ImportError as e:
    print(f"导入错误: {e}")
    print("请确保 bomberman_gym.py 和 classic_bomberman-daiceshi.py 文件位于正确的位置")
    sys.exit(1)

def random_agent_demo(episodes=5, steps_per_episode=200, render=True):
    """演示随机智能体玩游戏"""
    try:
        env = BombermanEnv(render_mode='human' if render else None)
    except Exception as e:
        print(f"创建环境时出错: {e}")
        return
    
    print("环境创建成功!")
    
    for episode in range(episodes):
        print(f"\nEpisode {episode+1}/{episodes}")
        obs, info = env.reset()
        total_rewards = {i: 0.0 for i in range(env.num_players)}
        
        for step in range(steps_per_episode):
            # 随机动作
            actions = {}
            for player_id in range(env.num_players):
                # 检查玩家是否存活
                if obs['state']['players'][player_id]['alive'] == 0:
                    continue
                    
                if random.random() < 0.2:  # 20%概率放置炸弹
                    actions[player_id] = {
                        'action_type': BombermanAction.PLACE_BOMB.value,
                        'target_x': 0,  # 放炸弹不需要目标坐标
                        'target_y': 0
                    }
                else:  # 80%概率移动
                    # 智能一点的随机移动 - 避开炸弹和火焰
                    current_x = obs['state']['players'][player_id]['position_x']
                    current_y = obs['state']['players'][player_id]['position_y']
                    
                    # 简单的启发式：朝着远离炸弹和火焰的方向移动
                    safe_directions = []
                    for dx, dy in [(0, 1), (1, 0), (0, -1), (-1, 0)]:
                        target_x = current_x + dx
                        target_y = current_y + dy
                        
                        # 检查是否在炸弹上
                        on_bomb = False
                        for i in range(obs['state']['bombs']['count']):
                            bomb_x = obs['state']['bombs']['positions_x'][i]
                            bomb_y = obs['state']['bombs']['positions_y'][i]
                            if (target_x, target_y) == (bomb_x, bomb_y):
                                on_bomb = True
                                break
                        
                        # 检查是否在火焰上
                        on_flame = False
                        for i in range(obs['state']['flames']['count']):
                            flame_x = obs['state']['flames']['positions_x'][i]
                            flame_y = obs['state']['flames']['positions_y'][i]
                            if (target_x, target_y) == (flame_x, flame_y):
                                on_flame = True
                                break
                        
                        if not on_bomb and not on_flame:
                            safe_directions.append((target_x, target_y))
                    
                    # 如果有安全方向，从中随机选择一个
                    if safe_directions:
                        target_x, target_y = random.choice(safe_directions)
                    else:
                        # 没有安全方向，随机移动
                        target_x = random.randint(0, 12)
                        target_y = random.randint(0, 10)
                    
                    actions[player_id] = {
                        'action_type': BombermanAction.MOVE.value,
                        'target_x': target_x,
                        'target_y': target_y
                    }
            
            # 执行动作
            obs, rewards, terminated, truncated, info = env.step(actions)
            
            # 累计奖励
            for player_id, reward in rewards.items():
                total_rewards[player_id] += reward
            
            print(f"Step {step+1}: Rewards: {rewards}")
            
            # 如果游戏结束，提前退出
            if terminated or truncated:
                break
                
            # 如果渲染，添加延迟以便观察
            if render:
                time.sleep(0.1)
        
        print(f"Episode {episode+1} ended. Total rewards: {total_rewards}")
    
    env.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Bomberman Gym Random Agent Demo')
    parser.add_argument('--episodes', type=int, default=5, help='Number of episodes to run')
    parser.add_argument('--steps', type=int, default=200, help='Maximum steps per episode')
    parser.add_argument('--no-render', action='store_true', help='Disable rendering')
    
    args = parser.parse_args()
    
    try:
        random_agent_demo(
            episodes=args.episodes,
            steps_per_episode=args.steps,
            render=not args.no_render
        )
    except Exception as e:
        print(f"运行时错误: {e}")
        import traceback
        traceback.print_exc()
